/* volc rtc example code

   This example code is in the Public Domain (or CC0 licensed, at your option.)

   Unless required by applicable law or agreed to in writing, this
   software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
   CONDITIONS OF ANY KIND, either express or implied.
*/
#include <string.h>

#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
#include "freertos/queue.h"

#include "esp_log.h"
#include "sdkconfig.h"

#include "esp_timer.h"
#include "audio_recorder.h"
#include "recorder_sr.h"
#include "audio_pipeline.h"
#include "audio_thread.h"
#include "raw_stream.h"
#include "audio_mem.h"
#include "esp_delegate.h"
#include "esp_dispatcher.h"

#include "VolcEngineRTCLite.h"
#include "coze_http_request.h"
#include "audio_processor.h"
#include "volc_rtc_message.h"

static const char* TAG = "volc_rtc";

#define STATS_TASK_PRIO          (5)
#define JOIN_EVENT_BIT           (1 << 0)
#define WAIT_DESTORY_EVENT_BIT   (1 << 0)
#define WAKEUP_REC_READING       (1 << 0)
#define DEFAULT_MAX_QUEUE_NUM    (30)
#define DEFAULT_QUEUE_CACHE_NUM  (10)

#define volc_rtc_safe_free(x, fn) do {   \
    if (x) {                             \
        fn(x);                           \
        x = NULL;                        \
    }                                    \
} while (0)

typedef struct {
    char *frame_ptr;
    int frame_len;
} frame_package_t;

typedef enum {
    RTC_JOIN_IDLE = 0,
    RTC_JOIN,
    RTC_LEAVE,
} rtc_join_state_t;

struct volc_rtc_t {
    recorder_pipeline_handle_t record_pipeline;
    player_pipeline_handle_t   player_pipeline;
    void                      *recorder_engine;
    byte_rtc_engine_t          engine;
    EventGroupHandle_t         join_event;
    EventGroupHandle_t         wait_destory_event;
    EventGroupHandle_t         wakeup_event;
    QueueHandle_t              frame_q;
    coze_http_req_result_t    *user_info;
    esp_dispatcher_handle_t    esp_dispatcher;
    rtc_join_state_t           join_state;
    bool                       byte_rtc_running;
    bool                       data_proc_running;
    esp_timer_handle_t         int_timer_hd;
#if defined(CONFIG_AUDIO_SUPPORT_OPUS_DECODER)
#define DEALULT_OPUS_DATA_CHACHE_SIZE (512)
    char                    *opus_data_cache;
    int                      opus_data_cache_len;
#endif  // CONFIF_AUDIO_SUPPORT_OPUS_DECODER
};

static struct volc_rtc_t s_volc_rtc;

static void audio_pull_data_process(char *ptr, int len);

// byte rtc lite callbacks
static void byte_rtc_on_join_room_success(byte_rtc_engine_t engine, const char* channel, int elapsed_ms, bool ms)
{
    ESP_LOGI(TAG, "join channel success %s elapsed %d ms now %d ms\n", channel, elapsed_ms, elapsed_ms);
    xEventGroupSetBits(s_volc_rtc.join_event, JOIN_EVENT_BIT);
};

static __attribute__((unused)) void byte_rtc_on_rejoin_room_success(byte_rtc_engine_t engine, const char* channel, int elapsed_ms)
{
    ESP_LOGI(TAG, "rejoin channel success %s\n", channel);
};

static void byte_rtc_on_user_joined(byte_rtc_engine_t engine, const char* channel, const char* user_name, int elapsed_ms)
{
    ESP_LOGI(TAG, "remote user joined  %s:%s", channel, user_name);
};

static void byte_rtc_on_user_offline(byte_rtc_engine_t engine, const char* channel, const char* user_name, int reason)
{
    ESP_LOGI(TAG, "remote user offline  %s:%s", channel, user_name);
};

static void byte_rtc_on_user_mute_audio(byte_rtc_engine_t engine, const char* channel, const char* user_name, int muted)
{
    ESP_LOGI(TAG, "remote user mute audio  %s:%s %d", channel, user_name, muted);
};

static void byte_rtc_on_user_mute_video(byte_rtc_engine_t engine, const char* channel, const char* user_name, int muted)
{
    ESP_LOGI(TAG, "remote user mute video  %s:%s %d", channel, user_name, muted);
};

static __attribute__((unused)) void byte_rtc_on_connection_lost(byte_rtc_engine_t engine, const char* channel)
{
    ESP_LOGI(TAG, "connection Lost  %s", channel);
};

static void byte_rtc_on_room_error(byte_rtc_engine_t engine, const char* channel, int code, const char* msg)
{
    ESP_LOGE(TAG, "error occur %s %d %s", channel, code, msg ? msg : "UnKown");
};

// remote audio
static void byte_rtc_on_audio_data(byte_rtc_engine_t engine, const char* room, const char*  uid , uint16_t sent_ts,
                                   audio_data_type_e codec, const void* data_ptr, size_t data_len)
{
    ESP_LOGD(TAG, "remote audio data %s %s %d %d %p %zu", room, uid, sent_ts, codec, data_ptr, data_len);

    pipe_player_state_e state;
    player_pipeline_get_state(s_volc_rtc.player_pipeline, &state);
    if (state == PIPE_STATE_IDLE) {
        return;
    }
    frame_package_t frame = { 0 };
    frame.frame_ptr = audio_calloc(1, data_len);
    memcpy(frame.frame_ptr, data_ptr, data_len);
    frame.frame_len = data_len;
    if (xQueueSend(s_volc_rtc.frame_q, &frame, pdMS_TO_TICKS(10)) != pdPASS) {
        ESP_LOGW(TAG, "audio frame queue full");
    }
}

// remote video
static void byte_rtc_on_video_data(byte_rtc_engine_t engine, const char*  channel, const char* uid, uint16_t sent_ts,
                                    video_data_type_e codec, int is_key_frame,
                                    const void * data_ptr, size_t data_len){ }

// remote message
void on_message_received(byte_rtc_engine_t engine, const char*  room, const char* uid, const uint8_t* message, int size, bool binary)
{
    ESP_LOGD(TAG, "on_message_received uid: %s, message: %s, message size: %d", uid, message, size);
    volc_rtc_message_process(message, size);
}

static void on_key_frame_gen_req(byte_rtc_engine_t engine, const char*  channel, const char*  uid) {}
// byte rtc lite callbacks end.

static void audio_pull_data_process(char *ptr, int len)
{
    char *data_ptr = ptr;
    int data_len = len;
    /* Since OPUS is in VBR mode, it needs to be packaged into a length + data format first then to decoder*/
#if defined (CONFIG_AUDIO_SUPPORT_OPUS_DECODER)

#define frame_length_prefix (2)
    if (s_volc_rtc.opus_data_cache_len + frame_length_prefix < len) {
        s_volc_rtc.opus_data_cache = audio_realloc(s_volc_rtc.opus_data_cache, len + frame_length_prefix);
        s_volc_rtc.opus_data_cache_len = len;
    }
    s_volc_rtc.opus_data_cache[0] = (len >> 8) & 0xFF;
    s_volc_rtc.opus_data_cache[1] = len & 0xFF;
    memcpy(s_volc_rtc.opus_data_cache + frame_length_prefix, ptr, len);
    data_ptr = s_volc_rtc.opus_data_cache;
    data_len += frame_length_prefix;
#else
    data_ptr = ptr;
    data_len = len;
#endif // CONFIG_AUDIO_SUPPORT_OPUS_DECODER
    raw_stream_write(player_pipeline_get_raw_write(s_volc_rtc.player_pipeline), data_ptr, data_len);
}

static void audio_tone_player_event_cb(audio_element_status_t evt)
{
    if (evt == AEL_STATUS_STATE_FINISHED) {
        player_pipeline_run(s_volc_rtc.player_pipeline);
    }
}

static esp_err_t dispatcher_audio_play(void *instance, action_arg_t *arg, action_result_t *result)
{
    audio_tone_play((char *)arg->data);
    return ESP_OK;
};

static esp_err_t rec_engine_cb(audio_rec_evt_t *event, void *user_data)
{
    if (AUDIO_REC_WAKEUP_START == event->type) {
#if CONFIG_LANGUAGE_WAKEUP_MODE
        ESP_LOGI(TAG, "rec_engine_cb - AUDIO_REC_WAKEUP_START");
        player_pipeline_stop(s_volc_rtc.player_pipeline);
        action_arg_t action_arg = {0};
        action_arg.data = (void *)"spiffs://spiffs/dingding.wav";

        action_result_t result = {0};
        esp_dispatcher_execute_with_func(s_volc_rtc.esp_dispatcher, dispatcher_audio_play, NULL, &action_arg, &result);
        xEventGroupSetBits(s_volc_rtc.wakeup_event, WAKEUP_REC_READING);
#endif // CONFIG_LANGUAGE_WAKEUP_MODE
    } else if (AUDIO_REC_VAD_START == event->type) {
    } else if (AUDIO_REC_VAD_END == event->type) {
    } else if (AUDIO_REC_WAKEUP_END == event->type) {
    #if CONFIG_LANGUAGE_WAKEUP_MODE
        xEventGroupClearBits(s_volc_rtc.wakeup_event, WAKEUP_REC_READING);
        player_pipeline_stop(s_volc_rtc.player_pipeline);
        ESP_LOGI(TAG, "rec_engine_cb - AUDIO_REC_WAKEUP_END");
    #endif // CONFIG_LANGUAGE_WAKEUP_MODE
    } else {
    }

    return ESP_OK;
}

static esp_err_t open_audio_pipeline()
{
    s_volc_rtc.record_pipeline = recorder_pipeline_open();
    s_volc_rtc.player_pipeline = player_pipeline_open();
    recorder_pipeline_run(s_volc_rtc.record_pipeline);
    player_pipeline_run(s_volc_rtc.player_pipeline);
    return ESP_OK;
}

static void byte_rtc_engine_destroy() 
{
    if (s_volc_rtc.engine) {
        byte_rtc_fini(s_volc_rtc.engine);
        vTaskDelay(pdMS_TO_TICKS(1000));
        byte_rtc_destory(s_volc_rtc.engine);
        s_volc_rtc.engine = NULL;
    }
}

static esp_err_t byte_rtc_engine_create()
{
#ifdef CONFIG_COZE_REQUEST
    #ifdef CONFIG_AUDIO_SUPPORT_OPUS_DECODER
        coze_http_req_audio_type_e audio_type = COZE_HTTP_REQ_AUDIO_TYPE_OPUS;
    #else
        coze_http_req_audio_type_e audio_type = COZE_HTTP_REQ_AUDIO_TYPE_G711A;
    #endif
    coze_http_req_config_t http_config = COZE_HTTP_DEFAULT_CONFIG();
    http_config.uri = CONFIG_COZE_URL"/startvoicechat";
    http_config.authorization = CONFIG_COZE_AUTHORIZATION;
    http_config.bot_id = CONFIG_COZE_BOTID;
    http_config.audio_type = audio_type,
    s_volc_rtc.user_info = coze_http_request(&http_config, COZE_HTTP_REQ_SVC_TYPE_RTC);
    
#else
    s_volc_rtc.user_info = audio_calloc(1, sizeof(coze_http_req_result_t));
    s_volc_rtc.user_info->app_id = CONFIG_APPID;
    s_volc_rtc.user_info->room_id = CONFIG_ROOMID;
    s_volc_rtc.user_info->uid = CONFIG_UID;
    s_volc_rtc.user_info->token = CONFIG_TOKEN;
#endif

    ESP_LOGI(TAG, "app_id: %s, room_id: %s, uid: %s, token: %s", 
            s_volc_rtc.user_info->app_id, s_volc_rtc.user_info->room_id, s_volc_rtc.user_info->uid, s_volc_rtc.user_info->token);
#if defined (CONFIG_AUDIO_SUPPORT_OPUS_DECODER)
    s_volc_rtc.opus_data_cache_len = DEALULT_OPUS_DATA_CHACHE_SIZE;
    s_volc_rtc.opus_data_cache = audio_calloc(1, s_volc_rtc.opus_data_cache_len);
#endif // CONFIG_AUDIO_SUPPORT_OPUS_DECODER

    byte_rtc_event_handler_t handler = {
        .on_join_room_success       =   byte_rtc_on_join_room_success,
        .on_room_error              =   byte_rtc_on_room_error,
        .on_user_joined             =   byte_rtc_on_user_joined,
        .on_user_offline            =   byte_rtc_on_user_offline,
        .on_user_mute_audio         =   byte_rtc_on_user_mute_audio,
        .on_user_mute_video         =   byte_rtc_on_user_mute_video,
        .on_audio_data              =   byte_rtc_on_audio_data,
        .on_video_data              =   byte_rtc_on_video_data,
        .on_key_frame_gen_req       =   on_key_frame_gen_req,
        .on_message_received        =   on_message_received,
    };
    s_volc_rtc.join_state = RTC_JOIN_IDLE;
    s_volc_rtc.engine = byte_rtc_create(s_volc_rtc.user_info->app_id, &handler);

    byte_rtc_set_log_level(s_volc_rtc.engine, BYTE_RTC_LOG_LEVEL_ERROR);
#ifdef RTC_TEST_ENV
    byte_rtc_set_params(s_volc_rtc.engine, "{\"env\":2}"); // test env
#endif
    byte_rtc_set_params(s_volc_rtc.engine, "{\"debug\":{\"log_to_console\":1}}"); 
    byte_rtc_set_params(s_volc_rtc.engine,"{\"rtc\":{\"thread\":{\"pinned_to_core\":1}}}");
    byte_rtc_set_params(s_volc_rtc.engine,"{\"rtc\":{\"thread\":{\"priority\":6}}}");
    // byte_rtc_set_params(s_volc_rtc.engine,"{\"rtc\":{\"network\":{\"audio_jitter_buffer_target_delay_min_ms\":200}}}");
    // byte_rtc_set_params(s_volc_rtc.engine,"{\"rtc\":{\"license\":{\"enable\":1}}}");

    byte_rtc_init(s_volc_rtc.engine);
#if defined (CONFIG_AUDIO_SUPPORT_OPUS_DECODER)
    byte_rtc_set_audio_codec(s_volc_rtc.engine, AUDIO_CODEC_TYPE_OPUS);
#elif defined (CONFIG_AUDIO_SUPPORT_G711A_DECODER)
    byte_rtc_set_audio_codec(s_volc_rtc.engine, AUDIO_CODEC_TYPE_G711A);
#else // AACLC Encoder
    byte_rtc_set_audio_codec(s_volc_rtc.engine, AUDIO_CODEC_TYPE_AACLC);
#endif // CONFIG_AUDIO_SUPPORT_OPUS_DECODER
    open_audio_pipeline();

    ESP_LOGI(TAG, "start join room\n");
    byte_rtc_room_options_t options = { 0 };
    options.auto_subscribe_audio    = 1;
    options.auto_subscribe_video    = 0;
    options.auto_publish_audio      = 1;
    options.auto_publish_video      = 0;
    byte_rtc_join_room(s_volc_rtc.engine, s_volc_rtc.user_info->room_id, s_volc_rtc.user_info->uid, s_volc_rtc.user_info->token, &options);
    // Default wait time is forever
    xEventGroupWaitBits(s_volc_rtc.join_event, JOIN_EVENT_BIT, pdTRUE, pdTRUE, portMAX_DELAY);
    ESP_LOGI(TAG, "join room success\n");
    s_volc_rtc.join_state = RTC_JOIN;
    return ESP_OK;
}

static void audio_data_process_task(void *args)
{
    frame_package_t frame = { 0 };
    s_volc_rtc.data_proc_running = true;
    while (s_volc_rtc.data_proc_running) {
        xQueueReceive(s_volc_rtc.frame_q, &frame, portMAX_DELAY);
        if (frame.frame_ptr) {
            audio_pull_data_process(frame.frame_ptr, frame.frame_len);
            audio_free(frame.frame_ptr);
        }
    }
    vTaskDelete(NULL);
}

static void voice_read_task(void *args)
{
    const int voice_data_read_sz = recorder_pipeline_get_default_read_size(s_volc_rtc.record_pipeline);
    uint8_t *voice_data = audio_calloc(1, voice_data_read_sz);
    bool runing = true;

#if defined (CONFIG_AUDIO_SUPPORT_OPUS_DECODER)
    audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_OPUS};
#elif defined (CONFIG_AUDIO_SUPPORT_G711A_DECODER)
    audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_PCMA};
#else
    audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_AACLC};
#endif // CONFIG_AUDIO_SUPPORT_OPUS_DECODER

#if defined (CONFIG_LANGUAGE_WAKEUP_MODE)
    TickType_t wait_tm = portMAX_DELAY;
#else
    TickType_t wait_tm = 0;
#endif // CONFIG_LANGUAGE_WAKEUP_MODE
    while (runing) {
    #if defined (CONFIG_LANGUAGE_WAKEUP_MODE)
        EventBits_t bits = xEventGroupWaitBits(s_volc_rtc.wakeup_event, WAKEUP_REC_READING , false, true, wait_tm);
        if (bits & WAKEUP_REC_READING) {
            int ret = audio_recorder_data_read(s_volc_rtc.recorder_engine, voice_data, voice_data_read_sz, portMAX_DELAY);
            if (ret == 0 || ret == -1) {
                if (ret == 0) {
                    vTaskDelay(15 / portTICK_PERIOD_MS);
                    continue;
                }
                ESP_LOGE(TAG, "audio_recorder_data_read failed, ret: %d\n", ret);
                xEventGroupClearBits(s_volc_rtc.wakeup_event,  WAKEUP_REC_READING);
            } else {
                byte_rtc_send_audio_data(s_volc_rtc.engine, s_volc_rtc.user_info->room_id, voice_data, voice_data_read_sz, &audio_frame_info);
            }
        }
    #else
        int read_len = audio_recorder_data_read(s_volc_rtc.recorder_engine, voice_data, voice_data_read_sz, portMAX_DELAY);
        if (read_len == voice_data_read_sz) {
            byte_rtc_send_audio_data(s_volc_rtc.engine, s_volc_rtc.user_info->room_id, voice_data, voice_data_read_sz, &audio_frame_info);
        }
    #endif
    }
    xEventGroupClearBits(s_volc_rtc.wakeup_event, WAKEUP_REC_READING);
    audio_free(voice_data);
    vTaskDelete(NULL);
}

static void log_clear(void)
{
    esp_log_level_set("*", ESP_LOG_INFO);
    esp_log_level_set("AUDIO_THREAD", ESP_LOG_ERROR);
    esp_log_level_set("i2c_bus_v2", ESP_LOG_ERROR);
    esp_log_level_set("AUDIO_HAL", ESP_LOG_ERROR);
    esp_log_level_set("AUDIO_PIPELINE", ESP_LOG_ERROR);
    esp_log_level_set("AUDIO_ELEMENT", ESP_LOG_ERROR);
    esp_log_level_set("I2S_STREAM_IDF5.x", ESP_LOG_ERROR);
    esp_log_level_set("RSP_FILTER", ESP_LOG_ERROR);
    esp_log_level_set("AUDIO_EVT", ESP_LOG_ERROR);
}

esp_err_t volc_rtc_init(void)
{
    log_clear();
    audio_tone_init(audio_tone_player_event_cb);

#if CONFIG_LANGUAGE_WAKEUP_MODE
    esp_timer_create_args_t timer_cfg = {
        .callback = rtc_int_timer_expired,
        .arg = NULL,
        .dispatch_method = ESP_TIMER_TASK,
        .name = "int_timer",
    };
    esp_timer_create(&timer_cfg, &s_volc_rtc.int_timer_hd);
#endif

    s_volc_rtc.join_event = xEventGroupCreate();
    s_volc_rtc.wait_destory_event = xEventGroupCreate();
    s_volc_rtc.wakeup_event = xEventGroupCreate();
    s_volc_rtc.frame_q = xQueueCreate(30, sizeof(frame_package_t));
    s_volc_rtc.esp_dispatcher = esp_dispatcher_get_delegate_handle();

    // volc_rtc_message_init(VOLC_RTC_MESSAGE_TYPE_SUBTITLE | VOLC_RTC_MESSAGE_TYPE_FUNCTION_CALL);
    byte_rtc_engine_create();
    s_volc_rtc.recorder_engine = audio_record_engine_init(s_volc_rtc.record_pipeline, rec_engine_cb);
    vTaskDelay(pdMS_TO_TICKS(200));
    audio_thread_create(NULL, "voice_read_task", voice_read_task, (void *)NULL, 5 * 1024, 5, true, 0);
    audio_thread_create(NULL, "audio_data_process_task", audio_data_process_task, (void *)NULL, 5 * 1024, 10, true, 0);
    return ESP_OK;
}

esp_err_t volc_rtc_deinit(void)
{
    s_volc_rtc.byte_rtc_running = false;
    s_volc_rtc.data_proc_running = false;
    xEventGroupWaitBits(s_volc_rtc.wait_destory_event, WAIT_DESTORY_EVENT_BIT, pdTRUE, pdTRUE, portMAX_DELAY);
    byte_rtc_engine_destroy();
    volc_rtc_safe_free(s_volc_rtc.join_event, vEventGroupDelete);
    volc_rtc_safe_free(s_volc_rtc.wait_destory_event, vEventGroupDelete);
    volc_rtc_safe_free(s_volc_rtc.int_timer_hd, esp_timer_delete);
    return ESP_OK;
}
